rm(list = ls())
gc()
set.seed(63296)
packages <-c("foreign", "ggplot2",  "readstata13", "stargazer", "tidyverse", "broom", 
             "gridExtra")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")


## load data
df <- read.csv("./Datasets/generalizability_dataset_final_panel_imputed.csv")

# select right variables
data <- df %>% 
  dplyr::select(protest_, SURVEIL, DISAP, KILL, POLPRIS, TORT, autoc, CTRY, YEAR, pop_, event_, imputed) %>% 
  filter(CTRY != "Russia")

## impute missings / -999 with sample mean 
data$autoc[data$autoc < 0] <- mean(data$autoc[data$autoc>-1], na.rm=T)
data$DISAP[data$DISAP < 0] <- mean(data$DISAP[data$DISAP>-1], na.rm=T)
data$KILL[data$KILL < 0] <- mean(data$KILL[data$KILL>-1], na.rm=T)
data$POLPRIS[data$POLPRIS < 0] <- mean(data$POLPRIS[data$POLPRIS>-1], na.rm=T)
data$TORT[data$TORT < 0] <- mean(data$TORT[data$TORT>-1], na.rm=T)

# normalize protests (using overall event reporting)
data$PROTESTS <- data$protest_/data$event_


####################
### Scatter Plot ###
####################

aggregate <- aggregate(data, by = list(data$CTRY), FUN = mean)
aggregate$CTRY <- NULL
aggregate$communist <- ifelse(aggregate$Group.1 == "Poland" |
                              aggregate$Group.1 == "Romania" |
                              aggregate$Group.1 == "Hungary" |
                              aggregate$Group.1 == "Armenia" |
                              aggregate$Group.1 == "Azerbaijan" |
                              aggregate$Group.1 == "Belarus" |
                              aggregate$Group.1 == "Croatia" |
                              aggregate$Group.1 == "Czech Republic" |
                              aggregate$Group.1 == "Georgia" |
                              aggregate$Group.1 == "Kazakhstan" |
                              aggregate$Group.1 == "Kyrgyz Republic" |
                              aggregate$Group.1 == "Latvia" |
                              aggregate$Group.1 == "Lithuania" |
                              aggregate$Group.1 == "Macedonia" |
                              aggregate$Group.1 == "Moldova" |
                              aggregate$Group.1 == "Russia" |
                              aggregate$Group.1 == "Slovak Republic" |
                              aggregate$Group.1 == "Tajikistan" |
                              aggregate$Group.1 == "Ukraine" |
                              aggregate$Group.1 == "Uzbekistan" |
                              aggregate$Group.1 == "Albania" , 0, 1)



# Focus on autocracies (mean split)
aggregated <- aggregate[which(aggregate$autoc>mean(aggregate$autoc, na.rm=T)),]

## plot
plot <- ggplot(aggregated, aes(x=SURVEIL, y=PROTESTS, col = factor(communist))) + 
  geom_point() + 
  geom_smooth(method = "lm", se=T, level=0.95, col = "black") +
  theme_minimal() +
  xlab("Surveillance (1981 - 1986)") + ylab("Protests (1981 - 1986)") + 
  theme_bw() +
  scale_colour_grey() +
  theme(legend.position = "none") +
  ggrepel::geom_text_repel(data=subset(aggregate, communist == 0), aes(x = SURVEIL, y = PROTESTS, label = Group.1))

plot

grid.arrange(plot, ncol=1)
pdf("PUT YOUR DIRECTORY HERE", width = 4, height = 5)
grid.arrange(plot, ncol=1)
dev.off()







